# -*- coding:utf-8 -*-
# @FileName :tokenizer.py
# @Date: 2024/7/31
# @Author:天空之城
# 地址分词模块
from address_normalizer.app.config import TOKENIZE_MODEL


class Tokenizer:

    @staticmethod
    def tokenize_jieba(text):
        import jieba
        seg_list = jieba.cut(text, cut_all=False)
        return list(seg_list)

    @staticmethod
    def tokenize(text):
        if TOKENIZE_MODEL == 'jieba':
            return Tokenizer.tokenize_jieba(text)


# 测试
if __name__ == '__main__':
    text = "北京市海淀区中关村大街1号"
    seg_list = Tokenizer.tokenize(text)
    print(seg_list)
    # ['北京市', '海淀区', '中关村大街', '1号']
